# -*- coding: utf-8 -*-
"""
@Time    : 2024/7/29 11:34 
@Author  : ZhangShenao 
@File    : 9.使用自定义文档分割器.py 
@Desc    : 使用自定义文档分割器
"""
from langchain_community.document_loaders import UnstructuredFileLoader

from keyword_text_splitter import KeywordTextSplitter

# 加载文档
loader = UnstructuredFileLoader('./docs/骆驼祥子.txt')
docs = loader.load()

# 创建自定义文档加载器
# 按照。分割文档,并且针对每个分片,提取3个关键词
splitter = KeywordTextSplitter(separator='。', top_k=3)

# 分割文档
chunks = splitter.split_documents(docs)
for chunk in chunks:
    print(chunk.page_content)
